This notebook aims to identify what, if any, correlations exists between the number of meteorite landings and the size and location of the landing.
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import folium
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display
%matplotlib inline
# Load the dataset
df = pd.read_csv('/Users/heatheradler/Documents/GitHub/Springboard/Springboard_Projects/Data Storytelling/Meteorite_Landings.csv')
We start by exploring the dataset to understand its structure and clean it if necessary.
df.rename({"mass (g)": "mass_in_grams", "reclat": "latitude", "reclong": "longitude"}, axis=1, inplace=True)
Next, we convert remove missing values in column 'mass (g)' and repalce remaining null values so that there are no longer any missing vlaues.
df_1 = df.dropna(subset=['mass_in_grams','latitude', 'longitude'])
df_1.fillna(0, inplace=True)
/var/folders/9q/j_dp7nx93lbc_t5zbbshxpch0000gn/T/ipykernel_82899/497208440.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_1.fillna(0, inplace=True)
#Create Point geometries
geometry = [Point(lon, lat) for lon, lat in zip(df_1['longitude'], df_1['latitude'])]
gdf_points = gpd.GeoDataFrame(df_1, geometry=geometry, crs='EPSG:4326')
We'll perform various analyses to explore the dataset more deeply.
# Create Point geometries
geometry = [Point(lon, lat) for lon, lat in zip(df_1['longitude'], df_1['latitude'])]
gdf_points = gpd.GeoDataFrame(df_1, geometry=geometry, crs='EPSG:4326')
# Load world countries shapefile
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# Perform spatial join
joined = gpd.sjoin(gdf_points, world[['geometry', 'iso_a3']], op='within')
country_names = joined['iso_a3']
df_1['country'] = country_names
merged_df = df_1.merge(world[['iso_a3', 'name']], how='left', left_on='country', right_on='iso_a3')
merged_df = merged_df.drop(columns=['iso_a3']).rename(columns={'name_y': 'country_name'})
/var/folders/9q/j_dp7nx93lbc_t5zbbshxpch0000gn/T/ipykernel_82899/294049908.py:6: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
/Users/heatheradler/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3466: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
if await self.run_code(code, result, async_=asy):
/var/folders/9q/j_dp7nx93lbc_t5zbbshxpch0000gn/T/ipykernel_82899/294049908.py:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_1['country'] = country_names
# Create folium map
df1_filtered = merged_df[['country_name', 'latitude', 'longitude', 'mass_in_grams']].dropna(subset=['country_name', 'latitude', 'longitude', 'mass_in_grams']).copy()
f = folium.Figure(width=1000, height=500)
m = folium.Map(location=df1_filtered[['latitude', 'longitude']].mean().values.tolist()).add_to(f)
for index, row in df1_filtered.iterrows():
folium.CircleMarker(
location=[row['latitude'], row['longitude']],
radius=1,
color='blue',
fill=True,
fill_color='blue',
fill_opacity=0.6,
popup=f'Name: {row["country_name"]}\nMass: {row["mass_in_grams"]}g'
).add_to(m)
m.fit_bounds([df1_filtered[['latitude', 'longitude']].min().values.tolist(), df1_filtered[['latitude', 'longitude']].max().values.tolist()])
df1_country_count = merged_df.groupby(['country_name'])[['country']].count().rename(columns={
"country": "meteorite_landings"
}).sort_values(by=['meteorite_landings'], ascending=False).reset_index()
df1_country_count.head(10)
| country_name | meteorite_landings | |
|---|---|---|
| 0 | Antarctica | 22097 |
| 1 | Oman | 3093 |
| 2 | United States of America | 1652 |
| 3 | Libya | 1474 |
| 4 | Australia | 632 |
| 5 | Algeria | 624 |
| 6 | Chile | 359 |
| 7 | Kenya | 227 |
| 8 | Morocco | 218 |
| 9 | India | 131 |
fig = px.bar(
df1_country_count.head(10).sort_values(by=['meteorite_landings'], ascending=True),
x='meteorite_landings',
y='country_name',
orientation='h', # horizontal bar chart
title="Meteorite Landings by Country",
labels={'meteorite_landings': 'Meteorite Landings Count', 'country_name': 'Country'}
)
fig.show()
df1_by_mass = merged_df.sort_values(by=['mass_in_grams'], ascending=False).head(10).copy()
df1_by_mass['mass_in_grams'] = df1_by_mass['mass_in_grams'].astype(int)
fig = px.scatter(df1_by_mass, y="country_name", x="year", color="country", size='mass_in_grams', width=1000, height=400)
fig.update_layout(
title="<b>Scatter Plot of Top 10 Heaviest Meteorite Landings</b>",
xaxis_title="<b>Year of Meteorite Landings</b>",
yaxis_title="<b>Country</b>",
legend_title="Country"
)
fig.show()
(1) The heaviest meteor landing was found in Nambia (1920).
(2) Out of the top 10 heaviest meteorite landings, Namibia and Mexico recorded 2 each.
(3) Out of the top 10 heaviest meteorite landings the oldest recorded was in Argentina (1575).
fig = px.bar(merged_df['year'].value_counts().sort_index().reset_index(), x='year', y='count', width=1000, height=400)
fig.update_layout(
title="<b>Bar Plot of Yearly Meteorite Landings</b>",
xaxis_range=[ 1950, 2023 ],
xaxis_title="<b>Year of Meteorite Landings</b>",
yaxis_title="<b>Total Meteorite Landings</b>"
)
fig.update_traces(marker_color='green')
fig.show()
The count of meteorite landings reached its peak in 1979. Since then, the meteorite landing count has fluctuated with highs in 1988 and 1998.
df1_class = merged_df['recclass'].value_counts().reset_index()
total_count = df1_class['count'].sum()
df1_class['percentage'] = round((df1_class['count'] / total_count) * 100, 0)
threshold_percentage = 5
filtered_df = df1_class[df1_class['percentage'] >= threshold_percentage]
fig = px.pie(filtered_df, values='percentage', names='recclass', width=600, height=600)
fig.update_layout(title="<b>Meteorite Landings By Class Type</b>", )
fig.update_traces(textposition='inside', texttemplate='%{label}<br>%{value}%')
fig.show()
df1_class = merged_df.groupby('recclass')['mass_in_grams'].agg(['mean', 'count']).reset_index().rename({'mean': 'mass_in_grams'}, axis='columns')
df1_class['mass_in_kg'] = round(df1_class['mass_in_grams'] * 0.001, 1)
df1_class = df1_class.sort_values(by='count', ascending=False).head(10)
bar_trace = go.Bar(x=df1_class.head(10)['recclass'], y=df1_class.head(10)['mass_in_kg'], name='Average Mass of Meteorite (kg)')
scatter_trace = go.Scatter(
x=df1_class.head(10)['recclass'], y=df1_class.head(10)['count'], mode='markers+lines', name='Meteorite Class Count', yaxis='y2'
)
fig = go.Figure(data=[bar_trace, scatter_trace])
fig.update_layout(
xaxis=dict(title='<b>Meteorite Class</b>'),
yaxis=dict(title='<b>Average Mass of Meteorite (kg)</b>'),
yaxis2=dict(title='<b>Meteorite Class Count</b>', overlaying='y', side='right'),
title="<b>Bar Plot of Average Meteorite Mass (in kg) by Class, Sorted by Class Count</b>",
legend=dict(orientation='h', yanchor='top', y=1.15, xanchor='right', x=1),
width=1000,
height=400
)
fig.show()